package spider.hysrlzy;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import spider.Capturer;

import java.io.IOException;
import java.net.URL;
import java.util.*;

public class CategoryDataCapturer implements Capturer {
    @Override
    public List<Map<String, String>> capture(String url) {
        try {
            List<Map<String, String>> ret = new ArrayList<>();
            URL u = new URL(url);
            Document doc = null;
            doc = Jsoup.parse(u, 5000);
            Elements ele = doc.select(".sJobTypec>ul");
            if (ele.size() > 0) {
                for (int i = 0; i < ele.size(); i++) {
                    Map<String,String> category = new HashMap<>();
                    Element ul = ele.get(i);
                    String parent = ul.selectFirst(".sJobTypec1").text();
                    String children = "";
                    Elements e = ul.select(".sJobTypec2 a");
                    for (int j = 0; j < e.size(); j++) {
                        children += e.get(j).text() + "\n";
                    }
                    category.put("name", parent);
                    category.put("children", children);
                    ret.add(category);
                }
            }
            return ret;
        } catch (IOException e) {
            e.printStackTrace();
        }
        // 出错返回空集合
        return Collections.emptyList();
    }
}
